{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b050edae-48cf-47ba-b2ae-bf9fc9098bb4",
   "metadata": {},
   "source": [
    "## Word Position Swapping (Shuffling) for Text Augmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0ab61d1f-466f-4469-99da-b172a959c2cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Author: Sebastian Raschka\n",
      "\n",
      "Python implementation: CPython\n",
      "Python version       : 3.10.6\n",
      "IPython version      : 8.12.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "%load_ext watermark\n",
    "%watermark -a 'Sebastian Raschka' -v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1f2926e6-aeac-4a9b-b762-f8a4a891d5b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import re\n",
    "\n",
    "\n",
    "def sentence_order_shuffling(text):\n",
    "\n",
    "    # split upon period or question mark:\n",
    "    sentences = re.split(r'[.!?] ', text)\n",
    "    random.shuffle(sentences)\n",
    "    return '. '.join(sentences)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f3bfef8-58ca-45c8-9762-9be6b23b06ef",
   "metadata": {},
   "source": [
    "**Random swapping with a 20% swapping rate**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1820de77-bd90-4247-a7d3-1ff7dc429ff4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original Paragraph:\n",
      " The cat quickly jumped over the lazy dog. It was a sunny day, and the park was full of people. The children were playing, and the birds were singing.\n",
      "\n",
      "Augmented Paragraph:\n",
      " It was a sunny day, and the park was full of people. The children were playing, and the birds were singing.. The cat quickly jumped over the lazy dog\n"
     ]
    }
   ],
   "source": [
    "random.seed(1)\n",
    "\n",
    "paragraph = (\"The cat quickly jumped over the lazy dog. \"\n",
    "             \"It was a sunny day, and the park was full of people. \"\n",
    "             \"The children were playing, and the birds were singing.\")\n",
    "\n",
    "augmented_paragraph = sentence_order_shuffling(paragraph)\n",
    "\n",
    "print(\"Original Paragraph:\\n\", paragraph)\n",
    "print(\"\\nAugmented Paragraph:\\n\", augmented_paragraph)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "47edf286-4825-4274-94b4-d10423767646",
   "metadata": {},
   "source": [
    "**Show difference before and after**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "823a4b6b-5c33-4fd5-ade7-3b718d902ad4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "- The\n",
      "- cat\n",
      "- quickly\n",
      "- jumped\n",
      "- over\n",
      "- the\n",
      "- lazy\n",
      "- dog.\n",
      "  It\n",
      "  was\n",
      "  a\n",
      "  sunny\n",
      "  day,\n",
      "  and\n",
      "  the\n",
      "  park\n",
      "  was\n",
      "  full\n",
      "  of\n",
      "  people.\n",
      "  The\n",
      "  children\n",
      "  were\n",
      "  playing,\n",
      "  and\n",
      "  the\n",
      "  birds\n",
      "  were\n",
      "- singing.\n",
      "+ singing..\n",
      "?         +\n",
      "\n",
      "+ The\n",
      "+ cat\n",
      "+ quickly\n",
      "+ jumped\n",
      "+ over\n",
      "+ the\n",
      "+ lazy\n",
      "+ dog\n"
     ]
    }
   ],
   "source": [
    "import difflib\n",
    "\n",
    "\n",
    "d = difflib.Differ()\n",
    "diff = d.compare(paragraph.split(), augmented_paragraph.split())\n",
    "\n",
    "print('\\n'.join(diff))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71f2c7d2-f99f-4fb2-9c80-1c62bb543b0c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
